Setup

The settings list settings configure which parts, simulations, and analysis steps of this R markdown are executed. This way single components of the analysis can be disabled for the purpose of saving computation time.

settings <- list(
  # Application of the full DANA pipeline to the benchmark data
  bench.DANA=T,
  # Application of the full DANA pipeline to the test data
  test.DANA=T,
  # Generate interactive plots of co-expression structures
  investigate.coexpression=F,
  # Perform Differential Expression Analysis for test and benchmark data sets
  perform.DEA=T,
  # Generate paper figures
  generate.paper.figs=T,
  # Specify if paper figures are exported
  export.figures = T,
  # Path for file exports
  paper.fig.path = "../danawriteup/figs/",
  # Clears environment
  debug=TRUE
)
if(settings$debug) rm(list=ls()[ls()!="settings"])

Parameter configuration

The config list contains all parameters for the analysis. Remember to always set the working directory and the paths to the data/external files prior to running the code.

config <- list(
  DANA.path = "./R/DANA.R",
  
  ## Data
  case.name = "UCEC",
  # Read count data file for the full UCEC data set
  data.file.path = "data/TCGA_harmonized_UCEC.RData",
  # RData file for coordinates data frame based on miRBase miRNA definitions
  coords.file.path = "data/miRBase.coords.RData",
  
  ## Normalization Methods
  # Specify which normalization methods will be applied
  norm.apply.TC = TRUE,
  norm.apply.UQ = TRUE,
  norm.apply.med = TRUE,
  norm.apply.TMM = TRUE,
  norm.apply.DESeq = TRUE,
  norm.apply.PoissonSeq = TRUE,
  norm.apply.QN = TRUE,
  norm.apply.RUV = TRUE,
  
  # thresholds for zero-expressed, poorly-expressed and well-expressed genes 
  t.zero = 2,  # zero-expressed in [0, t.zero)
  t.poor = 5,  # poorly-expressed in [t.zero, t.well]
  t.well = 2^6,  # well-expressed in [t.well, inf)
  # distance threshold for clustering
  cluster.threshold = 10000,
  # preprocessing data transformation type: none ("n"), modified log2 ("log2"),
  # or cube root ("cube") available
  preprocess.transform = "log2",
  
  ## Correlation Estimation for positive and negative controls
  # Available methods | Tuning parameter calibration schemes
  # "mb"              | "cv", "aic", "bic", "av"
  # "huge.mb"         | "ric", "stars"
  # "glasso"          | "ric", "stars", "ebic"
  # "fastggm"         | none
  # "pearson"         | none
  # "spearman"        | none
  
  # Positive Controls
  corr.method.pos = "mb",
  tuning.criterion.pos = "bic",
  # Negative Controls
  corr.method.neg = "pearson",
  tuning.criterion.neg = "",
  # Generate plots within DANA
  generate.plots = FALSE  # generate comparison plots
)

Configuration for the differential expression analysis

config.DEA <- list(
  ## Data
  case.name = "UCEC",

  ## Normalization Methods
  # Specify which normalization methods will be applied
  norm.apply.TC = TRUE,
  norm.apply.UQ = TRUE,
  norm.apply.med = TRUE,
  norm.apply.TMM = TRUE,
  norm.apply.DESeq = TRUE,
  norm.apply.PoissonSeq = TRUE,
  norm.apply.QN = TRUE,
  norm.apply.RUV = TRUE,
  
  # Significance level for DEA
  alpha = 0.01,
  # Plots
  generate.volcano.plot = TRUE,
  generate.meanvar.plot = TRUE,
  # Use q-values (adjusted p-values) instead of p-values
  q.values = FALSE,
  # RUV reduces the parameter size. Reduce DEA result to RUV genes?
  perform.subsetting = FALSE
)

Load Packages

Load all required R libraries/packages.

# DANA functions
source(config$DANA.path)
# Libraries
library(openxlsx)  # read excel files
library(corrplot) # For mixed correlation plots
library(cowplot) # Arrange plots
library(RColorBrewer) # Colors
library(latex2exp) # for latex in axis labels 
library(ggcorrplot) # ggplot2 correlation plots

Load Data

Load the dataset under study

We load the data set into memory.

# Load the p x n matrix of read counts into the workspace
load(config$data.file.path)

# Unlist the data
benchmark <- TCGA.UCEC$benchmark
test <- TCGA.UCEC$test
benchmark.end <- benchmark$END
benchmark.ser <- benchmark$SER
test.end <- test$END
test.ser <- test$SER


# Transform gene names to lower case
rownames(benchmark.end) <- tolower(rownames(benchmark.end))
rownames(benchmark.ser) <- tolower(rownames(benchmark.ser))
rownames(test.end) <- tolower(rownames(test.end))
rownames(test.ser) <- tolower(rownames(test.ser))

# Rename
test.RC <- test <- cbind(test.end, test.ser) 
test.groups <- c(rep("END", dim(test.end)[2]), rep("SER", dim(test.ser)[2]))
bench.RC <- benchmark <- cbind(benchmark.end, benchmark.ser)  
bench.groups <- c(rep("END", dim(benchmark.end)[2]), rep("SER", dim(benchmark.ser)[2]))

# Inspect
cat("Dimensions of test (mixed-batch) data: ", dim(test.RC), "\n")
## Dimensions of test (mixed-batch) data:  1881 48
cat("Dimensions of benchmark (single-batch) data: ", dim(bench.RC), "\n")
## Dimensions of benchmark (single-batch) data:  1881 48

miRNA Coordinates

Load corresponding miRNA chromosome coordinates

A coordinates data frame that specifies the base pair location of each miRNA of the data set on the chromosomes is loaded.

# Load miRNA chromosome location coordinates data frame
load(config$coords.file.path)
coords <- coords  # change according to the name of the loaded data matrix

Remove genes that cannot be found in the coordinates data frame

Some miRNAs map to multiple locations of sequence families. We named the sequence family with a parenthesis that reflects the number of members from all the different genomic locations (e.g. hsa-let-7a(3)). These miRNAs cannot be uniquely mapped to the genome, therefore we must exclude these from the location based analysis.

# only consider genes that are present in the coordinate data frame
genes.in.coords <- rownames(coords)[na.omit(match(rownames(test.RC), rownames(coords)))]
cat(dim(test.RC)[1]-length(genes.in.coords), " genes not found in coords. Reducing data set to ", length(genes.in.coords), "genes.\n")
## 33  genes not found in coords. Reducing data set to  1848 genes.
# test data set
test.RC <- test.RC[genes.in.coords, ]
# benchmark data set
bench.RC <- bench.RC[genes.in.coords, ]
genes <- rownames(test.RC)
rm(genes.in.coords)

Examine the Data

First, we investigate the distribution of mean miRNA expression of the data.

# Histogram plot test data set
par(mfrow=c(1,1))
df <- data.frame(log.expression=log2(rowMeans(test.RC)+1))
print(ggplot(df, aes(x=log.expression)) + 
        geom_histogram(binwidth = .1, color="black", fill="black") +
        geom_vline(xintercept = log2(config$t.zero+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.poor+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.well+1), color="red",  linetype="dashed") +
        ggtitle("mixed-batch data set"))

rm(df)

# Histogram plot benchmark data set
par(mfrow=c(1,1))
df <- data.frame(log.expression=log2(rowMeans(bench.RC)+1))
print(ggplot(df, aes(x=log.expression)) + 
        geom_histogram(binwidth = .1, color="black", fill="black") +
        geom_vline(xintercept = log2(config$t.zero+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.poor+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.well+1), color="red",  linetype="dashed") +
        ggtitle("single-batch data set"))

rm(df)

We observe that there is a large proportion of poorly-expressed genes. Some of them have extremely low or zero mean expression which corresponds to essentially zero reads.

DANA - Performance Analysis of Normalization Methods

We apply the full analysis pipeline to the data set. This includes:

  1. Apply the normalization methods to the raw counts.
  2. Define positive and negative controls based on read counts.
  3. Estimate the level of coexpression between miRNAs for each data set.
  4. Compare positive and negative controls before and after normalization.

The following parameters are used:

  • Case name: UCEC
  • Control definition:
    • Definition of negative controls, read count in [2, 5]
    • Definition of positive controls, read count in [64, inf]
    • Distance threshold for clustering: 10^{4}
  • Data preprocessing transformation: log2
  • Coexpression estimation:
    • Correlation estimation method for positive controls: mb
    • Correlation estimation tuning parameter calibration method for positive controls: bic
  • Comparison statistics:
    • Generate plots? FALSE

Benchmark (single-batch) Data Set

if(settings$bench.DANA) {
  genes <- rownames(bench.RC)
  
  ## Apply Normalization
  data.norm <- normalize(X=bench.RC,
                         groups=bench.groups,
                         name="benchmark",
                         apply.TC=config$norm.apply.TC,
                         apply.UQ=config$norm.apply.UQ,
                         apply.med=config$norm.apply.med,
                         apply.TMM=config$norm.apply.TMM,
                         apply.DESeq=config$norm.apply.DESeq,
                         apply.PoissonSeq=config$norm.apply.PoissonSeq,
                         apply.QN=config$norm.apply.QN,
                         apply.RUV=config$norm.apply.RUV)
  bench.norm <- data.norm
  
  ## Define Controls
  pre.res <- define.controls(bench.RC,
                             t.zero=config$t.zero,
                             t.poor=config$t.poor,
                             t.well=config$t.well,
                             t.cluster=config$cluster.threshold,
                             coords=coords,
                             clusters=NULL)
  pos.controls <- pre.res$genes.pos
  pos.controls.bench <- pre.res$genes.pos
  neg.controls <- pre.res$genes.neg
  neg.controls.bench <- pre.res$genes.neg
  clusters <- pre.res$clusters
  clusters.bench <- clusters
  
  # Apply DANA pipeline
  DANA.bench <- DANA(data.RC=bench.RC, 
                     data.norm=bench.norm, 
                     pos.controls=pos.controls.bench, 
                     neg.controls=neg.controls.bench, 
                     clusters=clusters.bench,
                     coords=coords,
                     case.name="benchmark",
                     generate.plots=config$generate.plots,
                     preprocess.transform=config$preprocess.transform,
                     corr.method.pos=config$corr.method.pos,
                     tuning.criterion.pos=config$tuning.criterion.pos,
                     corr.method.neg=config$corr.method.neg,
                     tuning.criterion.neg=config$tuning.criterion.neg) 
  
  if(!config$generate.plots) {
    stargazer(DANA.bench$metrics, type="text", summary=FALSE, digits=4, 
              title="Comparison metrics for the single-batch data set", align=TRUE)
  }
  iplot.bench <- iggplot.corr(DANA.bench$data.model$pos$corr, clusters=clusters, title="Single-batch data set - Positive controls (un-normalized)", coords=coords)
  iplot.bench.TMM <- iggplot.corr(DANA.bench$norm.models$benchmark.TMM$pos$corr, clusters=clusters, title="Single-batch data set - Positive controls (TMM)", coords=coords)
}
## 973 genes has been filtered because they contains too small number of reads across the experiments.Number of positive control markers with RC in [64, inf): 127
## Number of negative control markers with RC in [2, 5]: 123
## Estimating correlations for pos. and neg. controls for data set benchmark...done
## Estimating correlations for pos. and neg. controls for data set benchmark.TMM...done
## Estimating correlations for pos. and neg. controls for data set benchmark.DESeq...done
## Estimating correlations for pos. and neg. controls for data set benchmark.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set benchmark.TC...done
## Estimating correlations for pos. and neg. controls for data set benchmark.Med...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVg...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVs...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVr...done
## Estimating correlations for pos. and neg. controls for data set benchmark.UQ...done
## Estimating correlations for pos. and neg. controls for data set benchmark.QN...done
## Comparing models benchmark vs. benchmark.TMM....done
## Comparing models benchmark vs. benchmark.DESeq....done
## Comparing models benchmark vs. benchmark.PoissonSeq....done
## Comparing models benchmark vs. benchmark.TC....done
## Comparing models benchmark vs. benchmark.Med....done
## Comparing models benchmark vs. benchmark.RUVg....done
## Comparing models benchmark vs. benchmark.RUVs....done
## Comparing models benchmark vs. benchmark.RUVr....done
## Comparing models benchmark vs. benchmark.UQ....done
## Comparing models benchmark vs. benchmark.QN....done
## 
## Comparison metrics for the single-batch data set
## ===================================
##                        cc    mscr  
## -----------------------------------
##    benchmark.TMM     0.9844 0.1153 
##   benchmark.DESeq    0.9938 0.1069 
## benchmark.PoissonSeq 0.9942 0.1499 
##     benchmark.TC     0.9821 -0.2690
##    benchmark.Med     0.9136 0.1675 
##    benchmark.RUVg    0.9832 0.0557 
##    benchmark.RUVs    0.9410 0.0895 
##    benchmark.RUVr    0.9702 0.1971 
##     benchmark.UQ     0.9239 0.1283 
##     benchmark.QN     0.9333 0.1558 
## -----------------------------------

Test (Mixed-Batch) Data Set

if(settings$test.DANA) {
  genes <- rownames(test.RC)
  
  ## Apply Normalization
  data.norm <- normalize(test.RC,
                         groups=test.groups,
                         name="test",
                         apply.TC=config$norm.apply.TC,
                         apply.UQ=config$norm.apply.UQ,
                         apply.med=config$norm.apply.med,
                         apply.TMM=config$norm.apply.TMM,
                         apply.DESeq=config$norm.apply.DESeq,
                         apply.PoissonSeq=config$norm.apply.PoissonSeq,
                         apply.QN=config$norm.apply.QN,
                         apply.RUV=config$norm.apply.RUV)
  test.norm <- data.norm
  
  # Define Controls  
  pre.res <- define.controls(test.RC,
                             t.zero=config$t.zero,
                             t.poor=config$t.poor,
                             t.well=config$t.well,
                             t.cluster=config$cluster.threshold,
                             coords=coords,
                             clusters=NULL)
  pos.controls <- pre.res$genes.pos
  pos.controls.test <- pre.res$genes.pos
  neg.controls <- pre.res$genes.neg
  neg.controls.test <- pre.res$genes.neg
  clusters <- pre.res$clusters
  clusters.test <- clusters
  
  # Apply DANA pipeline
  DANA.test <- DANA(data.RC=test.RC, 
                    data.norm=test.norm, 
                    pos.controls=pos.controls.test, 
                    neg.controls=neg.controls.test, 
                    clusters=clusters.test,
                    coords=coords,
                    case.name="test",
                    generate.plots=config$generate.plots,
                    preprocess.transform=config$preprocess.transform,
                    corr.method.pos=config$corr.method.pos,
                    tuning.criterion.pos=config$tuning.criterion.pos,
                    corr.method.neg=config$corr.method.neg,
                    tuning.criterion.neg=config$tuning.criterion.neg) 
  
  if(!config$generate.plots) {
    stargazer(DANA.test$metrics, type="text", summary=FALSE, digits=4, 
              title="Comparison metrics for the mixed-batch data set", align=TRUE)
  }
  iplot.test <- iggplot.corr(DANA.test$data.model$pos$corr, clusters=clusters, title="Mixed-batch data set - Positive controls (un-normalized)", coords=coords)
  iplot.test.TMM <- iggplot.corr(DANA.test$norm.models$test.TMM$pos$corr, clusters=clusters, title="Mixed-batch data set - Positive controls (TMM)", coords=coords)
}
## 1013 genes has been filtered because they contains too small number of reads across the experiments.Number of positive control markers with RC in [64, inf): 110
## Number of negative control markers with RC in [2, 5]: 112
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## 
## Comparison metrics for the mixed-batch data set
## =============================
##                   cc    mscr 
## -----------------------------
##    test.TMM     0.9847 0.4744
##   test.DESeq    0.9884 0.4338
## test.PoissonSeq 0.9871 0.4759
##     test.TC     0.9799 0.0732
##    test.Med     0.9878 0.2762
##    test.RUVg    0.9800 0.4220
##    test.RUVs    0.8663 0.4412
##    test.RUVr    0.9703 0.4864
##     test.UQ     0.9928 0.2458
##     test.QN     0.9460 0.4832
## -----------------------------

Investigate Co-expression Structures

We compare the estimated co-expression structures for the mixed-batch and single-batch dataset. Here, we take a look at the models for the un-normalized data as well as TMM-normalized data. Note that you cannot directly compare the graphs of benchmark with test data since the considered set of genes is not identical.

if(settings$investigate.coexpression && settings$bench.DANA && settings$test.DANA) {
  htmltools::tagList(list(iplot.bench, iplot.bench.TMM, iplot.test, iplot.test.TMM))  # show plots in markdown
}

Differential Expression Analysis

Normalize Data

if(settings$perform.DEA) {
  ## Reset data
  test.RC <- test 
  bench.RC <- benchmark  
  
  ## Normalize test Data
  test.norm <- normalize(test.RC,
                         groups=test.groups,
                         name="test",
                         apply.TC=config.DEA$norm.apply.TC,
                         apply.UQ=config.DEA$norm.apply.UQ,
                         apply.med=config.DEA$norm.apply.med,
                         apply.TMM=config.DEA$norm.apply.TMM,
                         apply.DESeq=config.DEA$norm.apply.DESeq,
                         apply.PoissonSeq=config.DEA$norm.apply.PoissonSeq,
                         apply.QN=config.DEA$norm.apply.QN,
                         apply.RUV=FALSE)
  
  test.norm.RUV.adjust <- list(test.RUVr=norm.RUV(test.RC, test.groups, "RUVr")$adjust.factor,
                               test.RUVs=norm.RUV(test.RC, test.groups, "RUVs")$adjust.factor,
                               test.RUVg=norm.RUV(test.RC, test.groups, "RUVg")$adjust.factor)
}
## 1038 genes has been filtered because they contains too small number of reads across the experiments.

Benchmark (single-batch) DEA

if(settings$perform.DEA) {
  ## Perform DEA on benchmark dataset
  bench.DEA <- DE.voom(data.RC=bench.RC, groups=bench.groups, plot=config.DEA$generate.meanvar.plot)
  if(config.DEA$generate.volcano.plot) plot.DE.volcano(bench.DEA, alpha=config.DEA$alpha, q.values=config.DEA$q.values)
}

## number of DE genes: 48

Test (mixed-batch) DEA

if(settings$perform.DEA) {
  ## Perform DEA on full dataset
  test.DEA <- DE.voom(data.RC=test.RC, groups=test.groups, plot=config.DEA$generate.meanvar.plot, plot.title="test (no norm)")
  if(config.DEA$generate.volcano.plot) plot.DE.volcano(test.DEA, alpha=config.DEA$alpha, q.values=config.DEA$q.values, title="test (no norm)")
  
  ## DEA for normalized test data
  test.norm.DEA <- list()
  for(i in 1:length(test.norm)) {
    test.norm.DEA <- 
      append(test.norm.DEA, list(DE.voom(data.RC=test.norm[[i]], 
                                         groups=test.groups, 
                                         plot=config.DEA$generate.meanvar.plot,
                                         plot.title=names(test.norm)[i])))
    if(config.DEA$generate.volcano.plot) {
      plot.DE.volcano(test.norm.DEA[[i]], 
                      alpha=config.DEA$alpha, 
                      q.values=config.DEA$q.values, 
                      title=names(test.norm)[i])
    }
  }
  ## DEA for RUV normalization
  for(i in 1:length(test.norm.RUV.adjust)) {
    test.norm.DEA <- 
      append(test.norm.DEA, list(DE.voom(data.RC=test.RC, 
                                         groups=test.groups, 
                                         plot=config.DEA$generate.meanvar.plot,
                                         plot.title=names(test.norm.RUV.adjust)[i],
                                         adjust=test.norm.RUV.adjust[[i]])))
    if(config.DEA$generate.volcano.plot) {
      plot.DE.volcano(test.norm.DEA[[i]], 
                      alpha=config.DEA$alpha, 
                      q.values=config.DEA$q.values, 
                      title=names(test.norm.RUV.adjust)[i])
    }
  }
  names(test.norm.DEA) <- c(names(test.norm), names(test.norm.RUV.adjust))
}

## number of DE genes: 75

## number of DE genes: 24

## number of DE genes: 34

## number of DE genes: 31

## number of DE genes: 27

## number of DE genes: 33

## number of DE genes: 38

## number of DE genes: 38

## number of DE genes: 24

## number of DE genes: 34

## number of DE genes: 31

Results

if(settings$perform.DEA) {
  ## Compare test (non-norm) to benchmark
  test.norm.DEA.stats <- list(test=compare.DEAs(DEA=test.DEA,
                                           truth.DEA=bench.DEA,
                                           alpha=config.DEA$alpha))
  ## Compare normalized test data to benchmark
  for(i in 1:length(test.norm.DEA)) {
    test.norm.DEA.stats <- 
      append(test.norm.DEA.stats, 
             list(compare.DEAs(DEA=test.norm.DEA[[i]],
                               truth.DEA=bench.DEA,
                               alpha=config.DEA$alpha,
                               subset=switch(config.DEA$perform.subsetting+1,
                                             NULL,
                                             rownames(test.norm.DEA[[i]])))))
  }
  names(test.norm.DEA.stats) <- c("test.No Norm", names(test.norm.DEA))
  test.norm.DEA.stats <- test.norm.DEA.stats[1:length(test.norm.DEA.stats)]
  
  
  ## Visualize results
  
  # FNR-FDR Plot
  test.DEA.res <- data.frame(method=substr(names(test.norm.DEA.stats), 6, 20),
                             FDR=-sapply(test.norm.DEA.stats, function(x) x$FDR)+1,
                             FNR=-sapply(test.norm.DEA.stats, function(x) x$FNR)+1)
  p.test.DEA.FNR.FDR <- ggplot(test.DEA.res, aes(x=FNR, y=FDR, label=method)) +
      theme_bw() + 
      geom_point(alpha=1) + 
      xlab("Sensitivity") + # True positive rate(1-FNR)
      ylab("Precision") + # Positive predictive value (1-FDR)
      geom_text_repel(aes(label = method), size=3) +
      scale_x_continuous(labels = scales::percent, limits=c(0,1),
                         breaks = scales::pretty_breaks(n = 4)) +
      scale_y_continuous(labels = scales::percent, limits = c(0,1))
  print(p.test.DEA.FNR.FDR)
  
  # TPR-FPR Plot
  test.DEA.res <- data.frame(method=substr(names(test.norm.DEA.stats), 6, 20),
                             TPR=sapply(test.norm.DEA.stats, function(x) x$TPR),
                             FPR=-sapply(test.norm.DEA.stats, function(x) x$FPR)+1)
  p.test.DEA.TPR.FPR <- ggplot(test.DEA.res, aes(x=TPR, y=FPR, label=method)) + 
      theme_bw() + 
      geom_point(alpha=1) + 
      xlab("TPR") + 
      ylab("1-FPR") + 
      geom_text_repel(aes(label = method), size=3) +
      scale_x_continuous(labels = scales::percent, limits=c(0,1),
                         breaks = scales::pretty_breaks(n = 4)) +
      scale_y_continuous(labels = scales::percent, limits = c(0,1))
  print(p.test.DEA.TPR.FPR)
  
  # CAT Plot
  d <- append(list("test.No Norm"=test.DEA), test.norm.DEA)
  names(d) <- substr(names(d), 6, 20)
  p.test.DEA.CAT <- plot.CAT(DEA=d, 
                             truth.DEA=bench.DEA, 
                             title="Significance ranks of miRNAs",
                             maxrank=50)
  print(p.test.DEA.CAT)
  rm(d)
}

## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Results and Figures for Paper

Number of samples, markers, and positive and negative controls

if(settings$generate.paper.figs) {
  
# Dimension of data after analysis
cat("Mixed-batch data:\n")
cat("  - Dimensions: p=", dim(test.RC)[1],", n=", dim(test.RC)[2], "\n", sep="")
cat("  - Positive controls:\n")
cat("    * Definition mean RC in interval: [", config$t.well, ", inf ]\n", sep="")
cat("    * Number of positive controls miRNAs:", length(pos.controls.test), "\n")
cat("  - Negative controls:\n")
cat("    * Definition mean RC in interval: [", config$t.zero, ",", config$t.poor, "]\n")
cat("    * Number of negative controls miRNAs:", length(neg.controls.test), "\n")


cat("\n\nSingle-batch data:\n")
cat("  - Dimensions: p=", dim(bench.RC)[1],", n=", dim(bench.RC)[2], "\n", sep="")
cat("  - Positive controls:\n")
cat("    * Definition mean RC in interval: [", config$t.well, ", inf ]\n", sep="")
cat("    * Number of positive controls miRNAs:", length(pos.controls.bench), "\n")
cat("  - Negative controls:\n")
cat("    * Definition mean RC in interval: [", config$t.zero, ",", config$t.poor, "]\n")
cat("    * Number of negative controls miRNAs:", length(neg.controls.bench), "\n")

}
## Mixed-batch data:
##   - Dimensions: p=1881, n=48
##   - Positive controls:
##     * Definition mean RC in interval: [64, inf ]
##     * Number of positive controls miRNAs: 110 
##   - Negative controls:
##     * Definition mean RC in interval: [ 2 , 5 ]
##     * Number of negative controls miRNAs: 112 
## 
## 
## Single-batch data:
##   - Dimensions: p=1881, n=48
##   - Positive controls:
##     * Definition mean RC in interval: [64, inf ]
##     * Number of positive controls miRNAs: 127 
##   - Negative controls:
##     * Definition mean RC in interval: [ 2 , 5 ]
##     * Number of negative controls miRNAs: 123

Read Count Distribution in the Data

Per Data-Set

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  # Histogram plot test data set
  test.RC.log2 <- log2(rowMeans(test.RC)+1)
  df <- data.frame(log.expression=test.RC.log2)
  p.test.RC.hist <- ggplot(df, aes(x=log.expression)) + 
    geom_histogram(binwidth = .1, color="black", fill="black") +
    geom_vline(xintercept = log2(config$t.zero+1), color="#3051ab", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.poor+1), color="#3051ab", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.well+1), color="9e086c",  linetype="longdash") +
    ggtitle("Test data set") +
    theme_classic()
  print(p.test.RC.hist)
  rm(df)

  
  # Histogram plot benchmark data set
  bench.RC.log2 <- log2(rowMeans(bench.RC)+1)
  df <- data.frame(log.expression=bench.RC.log2)
  p.bench.RC.hist <- ggplot(df, aes(x=log.expression)) + 
    geom_histogram(binwidth = .1, color="black", fill="black") +
    geom_vline(xintercept = log2(config$t.zero+1), color="#3051ab", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.poor+1), color="#3051ab", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.well+1), color="#9e086c",  linetype="longdash") +
    ggtitle("Benchmark data set") +
    theme_classic()
  print(p.bench.RC.hist)
  rm(df)
  
  
  # Combined read count histogram for test and benchmark data
  df <- data.frame(bench=bench.RC.log2, test=test.RC.log2)
  p.RC.hist <- ggplot(df) + 
    theme_classic() +
    geom_histogram(aes(x=test, y=..count..,fill="#69b3a2"), binwidth = .1) +
    geom_histogram(aes(x=bench, y=-..count.., fill="#404080"), binwidth = .1) +
    geom_vline(xintercept = log2(config$t.zero+1), color="#5851b8", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.poor+1), color="#5851b8", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.well+1), color="#E7298A",  linetype="longdash") +
    geom_segment(aes(x = log2(config$t.zero+1), y = 180, xend = log2(config$t.poor+1), yend = 180),
                 arrow=arrow(length=unit(.07, "in"), ends="both"),
                 color="#5851b8") +
    annotate(geom="label", x=(log2(config$t.zero+1)+log2(config$t.poor+1))/2.0, y=200,
             label="neg. contr.", color="#5851b8", size=4, fill = alpha(c("white"),0.85), label.size = NA) +
    geom_segment(aes(x = log2(config$t.well+1), y = 180, xend = 10, yend = 180),
                 arrow=arrow(length=unit(.07, "in"), ends="last"),
                 color="#E7298A") +
    annotate(geom="label", x=log2(config$t.well+1)+.5, y=200,
             label="pos. contr.", color="#E7298A", size=4, hjust=0, fill = alpha(c("white"),0.85), label.size = NA) +
    scale_fill_manual(name="Data set",values=c("#404080", "#69b3a2"), labels=c("single-batch", "mixed-batch"), guide = guide_legend(reverse=TRUE)) +
    theme(legend.position=c(0.86,0.86), 
          legend.background=element_rect(fill=alpha('white', 0.9))) +
    scale_y_continuous(breaks=c(-200,-150,-100,-50,0,50,100,150,200), labels=c(200,150,100,50,0,50,100,150,200), limits=c(-200,200)) +
    xlab("Mean log2(read count)") +
    ylab("Frequency")
  print(p.RC.hist)
  rm(df)
  
    ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_RC_Distribution.pdf"), p.RC.hist, width = 5, height=4, device="pdf")
  }
  
}

## Warning: Removed 1 rows containing missing values (geom_bar).

## Warning: Removed 1 rows containing missing values (geom_bar).

## Warning: Removed 1 rows containing missing values (geom_bar).

## Warning: Removed 1 rows containing missing values (geom_bar).

Group Means comparison between mixed- and single-batch data

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  ## log2(mean(RC))
  
  # Histogram plot test data set
  test.RC.log2 <- log2(rowMeans(test.RC)+1)
  bench.RC.log2 <- log2(rowMeans(bench.RC)+1)
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.1 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("mixed-batch") + 
    ylab("single-batch") + 
    ggtitle("log2(mean(read count) + 1)") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.1)
  rm(df,test.RC.log2, bench.RC.log2)
  
  # Histogram plot test data set (END only)
  test.RC.log2 <- log2(rowMeans(test.RC[, test.groups=="END"])+1)
  bench.RC.log2 <- log2(rowMeans(bench.RC[, bench.groups=="END"])+1)
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.END.1 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("mixed-batch (END)") + 
    ylab("single-batch (END)") + 
    ggtitle(" END  ---  log2(mean(read count) + 1)") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.END.1)
  rm(df,test.RC.log2, bench.RC.log2)
  
  # Histogram plot test data set (SER only)
  test.RC.log2 <- log2(rowMeans(test.RC[, test.groups=="SER"])+1)
  bench.RC.log2 <- log2(rowMeans(bench.RC[, bench.groups=="SER"])+1)
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.SER.1 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("mixed-batch (SER)") + 
    ylab("single-batch (SER)") + 
    ggtitle(" SER  ---  log2(mean(read count) + 1)") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.SER.1)
  rm(df,test.RC.log2, bench.RC.log2)
  
  
  
  ## log2(mean(RC))
  
  # Histogram plot test data set
  test.RC.log2 <- rowMeans(log2(test.RC+1))
  bench.RC.log2 <- rowMeans(log2(bench.RC+1))
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.2 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("mixed-batch") + 
    ylab("single-batch") + 
    ggtitle("mean(log2(read count + 1))") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.2)
  rm(df,test.RC.log2, bench.RC.log2)
  
  # Histogram plot test data set (END only)
  test.RC.log2 <- rowMeans(log2(test.RC[, test.groups=="END"]+1))
  bench.RC.log2 <- rowMeans(log2(bench.RC[, bench.groups=="END"]+1))
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.END.2 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("mixed-batch (END)") + 
    ylab("single-batch (END)") + 
    ggtitle(" END  ---  mean(log2(read count + 1))") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.END.2)
  rm(df,test.RC.log2, bench.RC.log2)
  
  # Histogram plot test data set (SER only)
  test.RC.log2 <- rowMeans(log2(test.RC[, test.groups=="SER"]+1))
  bench.RC.log2 <- rowMeans(log2(bench.RC[, bench.groups=="SER"]+1))
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.SER.2 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("mixed-batch (SER)") + 
    ylab("single-batch (SER)") + 
    ggtitle(" SER  ---  mean(log2(read count + 1))") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.SER.2)
  rm(df,test.RC.log2, bench.RC.log2)
  
    ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_RC_Comparison_all.pdf"), p.RC.comparison.2, width = 5, height=5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "UCEC_RC_Comparison_END.pdf"), p.RC.comparison.END.2, width = 5, height=5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "UCEC_RC_Comparison_SER.pdf"), p.RC.comparison.SER.2, width = 5, height=5, device="pdf")
  }
  
}

Mean-Variance Plot

Distribution property among technical replicates

if(settings$generate.paper.figs) {
  # Function for mean-variance plot for a given data set
  mean.variance.plot <- function(data.RC, title, axis.limit=NULL) {
    df <- data.frame(data.mean=log10(rowMeans(data.RC) + 1),
                     data.var =log10(rowVars(data.RC) + 1))
    lin.fit <- lm(df$data.var ~ df$data.mean)$coefficients
    p <- ggplot(df,aes(x=data.mean,y=data.var)) + 
      geom_point(alpha=.25) + 
      xlab("log10(Mean)") + 
      ylab("log10(Variance)") + 
      geom_abline(intercept = lin.fit[1], slope = lin.fit[2], color="red", linetype="longdash", size=1) +
      geom_abline(intercept = 0, slope = 1, colour="blue") +
      annotate("text", alpha=1, x=4, y=0.5, hjust=0, vjust=0, size=3, colour="red",
               label=paste0("log10(Variance) = ", round(lin.fit[1],2), " + ", round(lin.fit[2],2), "*log10(Mean)")) + 
      xlim(0, ifelse(is.null(axis.limit), max(df), axis.limit)) +
      ylim(0, ifelse(is.null(axis.limit), max(df), axis.limit)) +
      ggtitle(title) +
      theme(aspect.ratio=1, plot.title = element_text(hjust = 0.5)) +
      theme_classic()
    return(p)
  }
  
  ## Test data
  # END and SER
  p.meanvar.test <- mean.variance.plot(test.RC, "Mixed-batch Data", axis.limit=12.5)
  print(p.meanvar.test)
  # END (endometrioid)
  p.meanvar.test.END <- mean.variance.plot(test.RC[, test.groups == "END"], "Mixed-batch Data - END (endometrioid)", axis.limit=12.5)
  print(p.meanvar.test.END)
  # SER (serous)
  p.meanvar.test.SER <- mean.variance.plot(test.RC[, test.groups == "SER"], "Mixed-batch Data - SER (serous)", axis.limit=12.5)
  print(p.meanvar.test.SER)
  
  ## Benchmark data
  # END and SER
  p.meanvar.bench <- mean.variance.plot(bench.RC, "Single-batch Data", axis.limit=12.5)
  print(p.meanvar.bench)
  # END
  p.meanvar.bench.END <- mean.variance.plot(bench.RC[, bench.groups == "END"], "Single-batch Data - END (endometrioid)", axis.limit=12.5)
  print(p.meanvar.bench.END)
  # SER
  p.meanvar.bench.SER <- mean.variance.plot(bench.RC[, bench.groups == "SER"], "Single-batch Data - SER (serous)", axis.limit=12.5)
  print(p.meanvar.bench.SER)
}

Marker-specific sd vs mean

if(settings$generate.paper.figs) {
  
  ## Mixed-batch data
  # END and SER
  p.meanvar.test <- plot.mean.sd(test.RC, config$t.zero, config$t.poor, config$t.well)
  print(p.meanvar.test)
  # END (endometrioid)
  p.meanvar.test.END <- plot.mean.sd(test.RC[, test.groups == "END"], config$t.zero, config$t.poor, config$t.well, "Mixed-batch Data - END (endometrioid)")
  print(p.meanvar.test.END)
  # SER
  p.meanvar.test.SER <- plot.mean.sd(test.RC[, test.groups == "SER"], config$t.zero, config$t.poor, config$t.well, "Mixed-batch Data - SER (serous)")
  print(p.meanvar.test.SER)
  
  ## Single-Batch data
  # END and SER
  p.meanvar.bench <- plot.mean.sd(bench.RC, config$t.zero, config$t.poor, config$t.well, "Single-Batch Data")
  print(p.meanvar.bench)
  # END (endometrioid)
  p.meanvar.bench.END <- plot.mean.sd(bench.RC[, bench.groups == "END"], config$t.zero, config$t.poor, config$t.well, "Single-Batch Data - END (endometrioid)")
  print(p.meanvar.bench.END)
  # SER
  p.meanvar.bench.SER <- plot.mean.sd(bench.RC[, bench.groups == "SER"], config$t.zero, config$t.poor, config$t.well, "Single-Batch Data - SER (serous)")
  print(p.meanvar.bench.SER)
  
  ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_Test_MeanSD.pdf"), p.meanvar.test + labs(title = NULL), width = 5, height=4, device="pdf")
  }
}

Correlation Plots

if(settings$generate.paper.figs) {
  num.genes.plot.pos <- 60
  num.genes.plot.neg <- 60

  # Co-expression models
  bench.model <- DANA.bench$data.model
  test.model <- DANA.test$data.model
  test.norm.model <- DANA.test$norm.models$test.TMM
  
  # Common control miRNAs
  pos.controls.bench <- rownames(bench.model$pos$corr)
  pos.controls.test <- rownames(test.model$pos$corr)
  pos.controls.common <- intersect(pos.controls.test, pos.controls.bench)
  neg.controls.bench <- rownames(bench.model$neg$corr)
  neg.controls.test <- rownames(test.model$neg$corr)
  # reduce the number of genes for the plot
  num.genes.plot.pos <- min(num.genes.plot.pos, length(pos.controls.common))
  pos.controls.plot <- pos.controls.common[1:num.genes.plot.pos]
  
  num.genes.plot.neg <- min(num.genes.plot.neg,
                            dim(test.model$neg$corr)[1],
                            dim(bench.model$neg$corr)[1])
  
  # Subsetted correlation matrices
  corr.bench.pos <- bench.model$pos$corr[pos.controls.plot, pos.controls.plot]
  corr.test.pos <- test.model$pos$corr[pos.controls.plot, pos.controls.plot]
  corr.test.TMM.pos <- test.norm.model$pos$corr[pos.controls.plot, pos.controls.plot]
  corr.bench.neg <- bench.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.test.neg <- test.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.test.TMM.neg <- test.norm.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  
  ### Generate plots
  # Positive controls
  p.corr.pos.bench <- ggplot.corr(corr.bench.pos, 
                                  clusters=clusters, 
                                  threshold=0, 
                                  title="Single-batch (un-normalized)",
                                  coords=coords)
  p.corr.pos.test <- ggplot.corr(corr.test.pos, 
                                 clusters=clusters, 
                                 threshold=0, 
                                 title="Mixed-batch (un-normalized)",
                                 coords=coords)
  p.corr.pos.test.TMM <- ggplot.corr(corr.test.TMM.pos, 
                                     clusters=clusters, 
                                     threshold=0, 
                                     title="Mixed-batch (TMM)",
                                     coords=coords)
  
  p.corr.pos <- arrangeGrob(p.corr.pos.bench + theme(legend.position="none"), 
                            p.corr.pos.test + theme(legend.position="none"),
                            get.legend(p.corr.pos.bench), 
                            widths = c(2,2,1))
  grid.arrange(p.corr.pos)  # display plot
  
  p.corr.pos.three <- arrangeGrob(p.corr.pos.bench + theme(legend.position="none"), 
                                  p.corr.pos.test.TMM + theme(legend.position="none"),
                                  p.corr.pos.test + theme(legend.position="none"),
                                  get.legend(p.corr.pos.bench + theme(legend.position = "bottom")),
                                  layout_matrix=rbind(c(1,2,3), c(4,4,4)),
                                  heights = c(5,1))
  grid.arrange(p.corr.pos.three)  # display plot
  
  # Negative controls
  p.corr.neg.bench <- ggplot.corr(corr.bench.neg, 
                                  clusters=clusters, 
                                  threshold=0, 
                                  title="Single-batch (un-normalized)")
  p.corr.neg.test <- ggplot.corr(corr.test.neg, 
                                 clusters=clusters, 
                                 threshold=0, 
                                 title="Mixed-batch (un-normalized)")
  p.corr.neg.test.TMM <- ggplot.corr(corr.test.TMM.neg, 
                                     clusters=clusters, 
                                     threshold=0, 
                                     title="Mixed-batch (TMM)")
  
  p.corr.neg <- arrangeGrob(p.corr.neg.bench + theme(legend.position="none"), 
                            p.corr.neg.test + theme(legend.position="none"),
                            get.legend(p.corr.neg.bench),
                            widths = c(2,2,1))
  grid.arrange(p.corr.neg)  # display plot
  
  
  p.corr.neg.three <- arrangeGrob(p.corr.neg.bench + theme(legend.position="none"), 
                                  p.corr.neg.test.TMM + theme(legend.position="none"),
                                  p.corr.neg.test + theme(legend.position="none"),
                                  get.legend(p.corr.neg.bench + theme(legend.position = "bottom")),
                                  layout_matrix=rbind(c(1,1,2,2,3,3), c(4,4,4,4,4,4)),
                                  heights = c(5,1))
  grid.arrange(p.corr.neg.three)  # display plot
  
  
  ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_CorrPos.pdf"), p.corr.pos, width = 8, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "UCEC_CorrNeg.pdf"), p.corr.neg, width = 8, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "UCEC_CorrPos_TMM.pdf"), p.corr.pos.three, width = 8, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "UCEC_CorrNeg_TMM.pdf"), p.corr.neg.three, width = 8, height=3.5, device="pdf")
  }
}
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

All normalization methods

if(settings$generate.paper.figs) { 
  p.corr.test <- ggplot.corr(DANA.test$data.model$pos$corr,
                             clusters=clusters,
                             title="mixed-batch (un-normalized)")
  p.corr.TMM <- ggplot.corr(DANA.test$norm.models$test.TMM$pos$corr,
                            clusters=clusters,
                            title="TMM")
  p.corr.DESeq <- ggplot.corr(DANA.test$norm.models$test.DESeq$pos$corr,
                            clusters=clusters,
                            title="DESeq")
  p.corr.PoissonSeq <- ggplot.corr(DANA.test$norm.models$test.PoissonSeq$pos$corr,
                            clusters=clusters,
                            title="PoissonSeq")
  p.corr.TC <- ggplot.corr(DANA.test$norm.models$test.TC$pos$corr,
                            clusters=clusters,
                            title="TC")
  p.corr.Med <- ggplot.corr(DANA.test$norm.models$test.Med$pos$corr,
                            clusters=clusters,
                            title="Med")
  p.corr.RUVg <- ggplot.corr(DANA.test$norm.models$test.RUVg$pos$corr,
                            clusters=clusters,
                            title="RUVg")
  p.corr.RUVs <- ggplot.corr(DANA.test$norm.models$test.RUVs$pos$corr,
                            clusters=clusters,
                            title="RUVs")
  p.corr.RUVr <- ggplot.corr(DANA.test$norm.models$test.RUVr$pos$corr,
                            clusters=clusters,
                            title="RUVr")
  p.corr.UQ <- ggplot.corr(DANA.test$norm.models$test.UQ$pos$corr,
                            clusters=clusters,
                            title="UQ")
  p.corr.QN <- ggplot.corr(DANA.test$norm.models$test.QN$pos$corr,
                            clusters=clusters,
                            title="QN")
  
  # single-batch data
  DANA.bench.plot <- DANA(data.RC=bench.RC, 
                     data.norm=bench.norm, 
                     pos.controls=pos.controls.test, 
                     neg.controls=neg.controls.bench, 
                     clusters=clusters.test,
                     coords=coords,
                     case.name="benchmark",
                     generate.plots=config$generate.plots,
                     preprocess.transform=config$preprocess.transform,
                     corr.method.pos=config$corr.method.pos,
                     tuning.criterion.pos=config$tuning.criterion.pos,
                     corr.method.neg=config$corr.method.neg,
                     tuning.criterion.neg=config$tuning.criterion.neg) 
  p.corr.bench <- ggplot.corr(DANA.bench.plot$data.model$pos$corr,
                              clusters=clusters,
                              title="single-batch (un-normalized)")
  
  # Arrange plots
  p.corr.test.all <-
    plot_grid(p.corr.bench + theme(legend.position="none"),
              p.corr.test + theme(legend.position="none"),
              p.corr.TMM + theme(legend.position="none"),
              p.corr.DESeq + theme(legend.position="none"),
              p.corr.PoissonSeq + theme(legend.position="none"),
              p.corr.QN + theme(legend.position="none"),
              p.corr.RUVg + theme(legend.position="none"),
              p.corr.RUVs + theme(legend.position="none"),
              p.corr.RUVr + theme(legend.position="none"),
              p.corr.Med + theme(legend.position="none"),
              p.corr.TC + theme(legend.position="none"),
              p.corr.UQ + theme(legend.position="none"),
              ncol=3)
  plot(p.corr.test.all)
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_Test_Corr_All.pdf"), p.corr.test.all, width=9, height=12, device="pdf")
  }
}
## Estimating correlations for pos. and neg. controls for data set benchmark...done
## Estimating correlations for pos. and neg. controls for data set benchmark.TMM...done
## Estimating correlations for pos. and neg. controls for data set benchmark.DESeq...done
## Estimating correlations for pos. and neg. controls for data set benchmark.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set benchmark.TC...done
## Estimating correlations for pos. and neg. controls for data set benchmark.Med...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVg...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVs...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVr...done
## Estimating correlations for pos. and neg. controls for data set benchmark.UQ...done
## Estimating correlations for pos. and neg. controls for data set benchmark.QN...done
## Comparing models benchmark vs. benchmark.TMM....done
## Comparing models benchmark vs. benchmark.DESeq....done
## Comparing models benchmark vs. benchmark.PoissonSeq....done
## Comparing models benchmark vs. benchmark.TC....done
## Comparing models benchmark vs. benchmark.Med....done
## Comparing models benchmark vs. benchmark.RUVg....done
## Comparing models benchmark vs. benchmark.RUVs....done
## Comparing models benchmark vs. benchmark.RUVr....done
## Comparing models benchmark vs. benchmark.UQ....done
## Comparing models benchmark vs. benchmark.QN....done

Correlation Frequency Plots

if(settings$generate.paper.figs) {
  models <- list(
    "Single-batch" = DANA.bench$data.model,
    "Mixed-batch" = DANA.test$data.model,
    "Mixed-batch (TMM)" = DANA.test$norm.models$test.TMM,
    "Mixed-batch (DESeq)" = DANA.test$norm.models$test.DESeq,
    "Mixed-batch (PoissonSeq)" = DANA.test$norm.models$test.PoissonSeq,
    "Mixed-batch (RUVg)" = DANA.test$norm.models$test.RUVg,
    "Mixed-batch (RUVs)" = DANA.test$norm.models$test.RUVs,
    "Mixed-batch (RUVr)" = DANA.test$norm.models$test.RUVr,
    "Mixed-batch (TC)" = DANA.test$norm.models$test.TC,
    "Mixed-batch (Med)" = DANA.test$norm.models$test.Med,
    "Mixed-batch (UQ)" = DANA.test$norm.models$test.UQ,
    "Mixed-batch (QN)" = DANA.test$norm.models$test.QN
  )
  
  # Set number of genes for negative correlation to minimum found
  num.genes.plot <- min(sapply(models, function(x) dim(x$neg$corr)[1]))
  
  # Subsetted correlation matrices
  corrs <- lapply(models, function(x) x$neg$corr[1:num.genes.plot, 1:num.genes.plot])
  corrs <- lapply(corrs, function(x) x[upper.tri(x)])
  
  control <- factor(
    as.vector(sapply(names(corrs), function(x) rep(x,length(corrs[[1]])))),
    levels = names(models) 
  )
  neg.corr <- data.frame(
    control=factor(control),
    corr=unname(unlist(corrs))
  )
  
  # plot insert
  p.insert <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_vline(xintercept = 0, size=.4, color="gray80", linetype="solid") +
    geom_freqpoly(binwidth=.05, alpha=.9, size=.3) +
    theme_bw() +
    xlim(-1, 1) +
    scale_color_manual(values=brewer.pal(n=12, name="Paired")) +
    theme(legend.position="none",
          axis.title.x = element_blank(),
          axis.title.y = element_blank())
  
  
  p.corr.frequency.neg.controls.all <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_vline(xintercept = 0, size=.4, color="gray80", linetype="solid") +
    geom_freqpoly(binwidth=.05, alpha=.9, size=.75) +
    theme_classic() +
    xlim(-1, 1) +
    ylim(0,200) +
    scale_color_manual(values=brewer.pal(n=12, name="Paired")) +
    theme(legend.position=c(0.84,0.8), 
          legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.5))) +
    ylab("Frequency") +
    xlab("Marginal correlation") + 
    theme(legend.key.width = unit(2.4,"cm")) +
    annotation_custom(ggplotGrob(p.insert), xmin=-1.1, xmax=-0.3, ymin=100, ymax=200)
  print(p.corr.frequency.neg.controls.all)
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_CorrDensityNeg_Freq_All.pdf"), p.corr.frequency.neg.controls.all, width = 8, height=8, device="pdf")
  }
}
## Warning: Removed 24 row(s) containing missing values (geom_path).

## Warning: Removed 24 row(s) containing missing values (geom_path).

## Warning: Removed 24 row(s) containing missing values (geom_path).

Correlation Scatter Plots

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  # un-normalized vs TMM
  p.scatter.test.TMM <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.TMM$pos$corr,
    clusters=clusters.test, title="TMM", xlab="un-normalized", ylab="TMM", 
    ccc=round(DANA.test$metrics["test.TMM", "cc"],3))
  # un-normalized vs DESeq
  p.scatter.test.DESeq <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.DESeq$pos$corr,
    clusters=clusters.test, title="DESeq", xlab="un-normalized", ylab="DESeq", 
    ccc=round(DANA.test$metrics["test.DESeq", "cc"],3))
  # un-normalized vs PoissonSeq
  p.scatter.test.PoissonSeq <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.PoissonSeq$pos$corr,
    clusters=clusters.test, title="PoissonSeq", xlab="un-normalized", ylab="PoissonSeq", 
    ccc=round(DANA.test$metrics["test.PoissonSeq", "cc"],3))
  # un-normalized vs TC
  p.scatter.test.TC <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.TC$pos$corr,
    clusters=clusters.test, title="TC", xlab="un-normalized", ylab="TC", 
    ccc=round(DANA.test$metrics["test.TC", "cc"],3))
  # un-normalized vs Med
  p.scatter.test.Med <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.Med$pos$corr,
    clusters=clusters.test, title="Med", xlab="un-normalized", ylab="Med", 
    ccc=round(DANA.test$metrics["test.Med", "cc"],3))
  # un-normalized vs RUVg
  p.scatter.test.RUVg <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.RUVg$pos$corr,
    clusters=clusters.test, title="RUVg", xlab="un-normalized", ylab="RUVg", 
    ccc=round(DANA.test$metrics["test.RUVg", "cc"],3))
  # un-normalized vs RUVs
  p.scatter.test.RUVs <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.RUVs$pos$corr,
    clusters=clusters.test, title="RUVs", xlab="un-normalized", ylab="RUVs", 
    ccc=round(DANA.test$metrics["test.RUVs", "cc"],3))
  # un-normalized vs RUVr
  p.scatter.test.RUVr <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.RUVr$pos$corr,
    clusters=clusters.test, title="RUVr", xlab="un-normalized", ylab="RUVr", 
    ccc=round(DANA.test$metrics["test.RUVr", "cc"],3))
  # un-normalized vs UQ
  p.scatter.test.UQ <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.UQ$pos$corr,
    clusters=clusters.test, title="UQ", xlab="un-normalized", ylab="UQ", 
    ccc=round(DANA.test$metrics["test.UQ", "cc"],3))
  # un-normalized vs QN
  p.scatter.test.QN <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.QN$pos$corr,
    clusters=clusters.test, title="QN", xlab="un-normalized", ylab="QN", 
    ccc=round(DANA.test$metrics["test.QN", "cc"],3))
  
  
  p.scatter.test.all <- plot_grid(p.scatter.test.TMM,
                                  p.scatter.test.DESeq,
                                  p.scatter.test.PoissonSeq,
                                  p.scatter.test.TC,
                                  p.scatter.test.Med,
                                  p.scatter.test.RUVg,
                                  p.scatter.test.RUVs,
                                  p.scatter.test.RUVr,
                                  p.scatter.test.UQ,
                                  p.scatter.test.QN,
                                  ncol = 3,
                                  align="hv")
  plot(p.scatter.test.all)
  
  
  # Save plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_Scatter_All.pdf"), p.scatter.test.all, width=9, height=12, device="pdf")
  }
}
## Warning in plot.corr.scatter(corr1 = DANA.test$data.model$pos$corr, corr2 = DANA.test$norm.models$test.RUVs$pos$corr, : Warning: Shapes of correlation matrices in plot.corr.scatter do not match!
##             Reducing correlation matrices to all mutual genes.
## Warning: Removed 2 rows containing missing values (geom_point).

DANA Result Metrics

if(settings$generate.paper.figs) { 
  options(scipen=4) # force non-scientific notation of x axis
  
  test.DANA.metrics <- data.frame(
    method=substr(rownames(DANA.test$metrics), 6, 20),
    cc=DANA.test$metrics[, "cc"],
    mscr=DANA.test$metrics[, "mscr"]
  )
  
  p.metrics.test <- ggplot(test.DANA.metrics,aes(x=mscr,y=cc, label=method)) + 
    geom_point(alpha=.5) + #pos=position_jitter(width=0.001, seed=1),
    theme_classic() + 
    xlab(TeX("$mscr^-$; Relative reduction of handling effects")) + 
    ylab(TeX("$cc^+$; Biological signal preservation")) + 
    geom_text_repel(aes(label = method), size=3, max.overlaps = Inf, min.segment.length = 0.2, force = 1, box.padding = 0.2) +
    # xlim(c(min(c(0, test.mposc.reduction)),max(test.mposc.reduction))) +
    scale_x_continuous(labels = scales::percent, limits=c(0,1.05), 
                       breaks = scales::pretty_breaks(n = 5)) +
    # ylim(c(0,1)) 
    scale_y_continuous(labels = scales::percent, limits = c(0.5,1))
  print(p.metrics.test)
  
  print("DANA Statistics for mixed-batch data")
  test.DANA.metrics$cc <- round(test.DANA.metrics$cc,3)
  test.DANA.metrics$mscr <- round(test.DANA.metrics$mscr,3)
  print(test.DANA.metrics)
  
  
  bench.DANA.metrics <- data.frame(
    method=substr(rownames(DANA.bench$metrics), 11, 30),
    cc=DANA.bench$metrics[, "cc"],
    mscr=DANA.bench$metrics[, "mscr"]
  )
  
  p.metrics.bench <- ggplot(bench.DANA.metrics, aes(x=mscr, y=cc, label=method)) + 
    geom_point(alpha=.5) +
    theme_classic() + 
    xlab(TeX("$mscr^-$; Relative reduction of handling effects")) + 
    ylab(TeX("$cc^+$; Biological signal preservation")) + 
    geom_text_repel(aes(label = method), size=3) +
    scale_x_continuous(labels = scales::percent, limits=c(0, 1.05),
                       breaks = scales::pretty_breaks(n = 5)) +
    scale_y_continuous(labels = scales::percent, limits = c(0.5, 1)) 
  print(p.metrics.bench)
  
  print("DANA Statistics for single-batch data")
  bench.DANA.metrics$cc <- round(bench.DANA.metrics$cc,3)
  bench.DANA.metrics$mscr <- round(bench.DANA.metrics$mscr,3)
  print(bench.DANA.metrics)
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_MetricsTest.pdf"), p.metrics.test, width=5.5, height=4.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "UCEC_MetricsBench.pdf"), p.metrics.bench, width=4.5, height=3.5, device="pdf")
  }
  
}

## [1] "DANA Statistics for mixed-batch data"
##        method    cc  mscr
## 1         TMM 0.985 0.474
## 2       DESeq 0.988 0.434
## 3  PoissonSeq 0.987 0.476
## 4          TC 0.980 0.073
## 5         Med 0.988 0.276
## 6        RUVg 0.980 0.422
## 7        RUVs 0.866 0.441
## 8        RUVr 0.970 0.486
## 9          UQ 0.993 0.246
## 10         QN 0.946 0.483
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## [1] "DANA Statistics for single-batch data"
##        method    cc   mscr
## 1         TMM 0.984  0.115
## 2       DESeq 0.994  0.107
## 3  PoissonSeq 0.994  0.150
## 4          TC 0.982 -0.269
## 5         Med 0.914  0.168
## 6        RUVg 0.983  0.056
## 7        RUVs 0.941  0.090
## 8        RUVr 0.970  0.197
## 9          UQ 0.924  0.128
## 10         QN 0.933  0.156
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text_repel).

DEA Results

if(settings$generate.paper.figs && settings$perform.DEA) { 
  # FNR-FDR Plot
  test.DEA.res <- data.frame(method=substr(names(test.norm.DEA.stats), 6, 20),
                             FDR=-sapply(test.norm.DEA.stats, function(x) x$FDR)+1,
                             FNR=-sapply(test.norm.DEA.stats, function(x) x$FNR)+1)
  p.test.DEA.FNR.FDR <- ggplot(test.DEA.res, aes(x=FNR, y=FDR, label=method)) +
      theme_classic() + 
      geom_point(alpha=.5) + 
      xlab("Sensitivity") + # True positive rate(1-FNR)
      ylab("Positive predictive value") + # Positive predictive value (1-FDR)
      geom_text_repel(aes(label = method), size=3, max.overlaps = Inf) + 
      scale_x_continuous(labels = scales::percent, limits=c(0,1),
                         breaks = scales::pretty_breaks(n = 4)) +
      scale_y_continuous(labels = scales::percent, limits = c(0,1))
  print(p.test.DEA.FNR.FDR)
  
  # CAT Plot
  d <- append(list("test.No Norm"=test.DEA), test.norm.DEA)
  names(d) <- substr(names(d), 6, 20)
  p.test.DEA.CAT.1 <- plot.CAT(DEA=d[1:7], 
                             truth.DEA=bench.DEA, 
                             title="Significance ranks of miRNAs",
                             maxrank=100)
  print(p.test.DEA.CAT.1)
  p.test.DEA.CAT.2 <- plot.CAT(DEA=d[c(1,8:11)], 
                             truth.DEA=bench.DEA, 
                             title="Significance ranks of miRNAs",
                             maxrank=100)
  print(p.test.DEA.CAT.2)
  rm(d)
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_DEA_FNR_FDR.pdf"), p.test.DEA.FNR.FDR, width=5.5, height=4.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "UCEC_DEA_CAT.pdf"), p.test.DEA.CAT, width=4.5, height=3.5, device="pdf")
  }
}

Arrange plots for paper

if(settings$generate.paper.figs && settings$perform.DEA) { 
  p.results.paper <- 
    plot_grid(p.RC.hist, 
              p.meanvar.test + labs(title = NULL),
              p.metrics.test,
              p.test.DEA.FNR.FDR,
              align="hv",
              labels=c("a", "b", "c", "d"))
  plot(p.results.paper)
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "UCEC_Results.pdf"), p.results.paper, width=10, height=8.5, device="pdf")
  }
}
## Warning: Removed 1 rows containing missing values (geom_bar).

## Warning: Removed 1 rows containing missing values (geom_bar).

Package Information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] de_DE.UTF-8/de_DE.UTF-8/de_DE.UTF-8/C/de_DE.UTF-8/de_DE.UTF-8
## 
## attached base packages:
## [1] splines   stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggcorrplot_0.1.3            latex2exp_0.5.0            
##  [3] RColorBrewer_1.1-2          cowplot_1.1.1              
##  [5] openxlsx_4.2.4              ffpe_1.38.0                
##  [7] TTR_0.24.2                  DescTools_0.99.44          
##  [9] vsn_3.62.0                  RUVSeq_1.28.0              
## [11] EDASeq_2.28.0               ShortRead_1.52.0           
## [13] GenomicAlignments_1.30.0    Rsamtools_2.10.0           
## [15] Biostrings_2.62.0           XVector_0.34.0             
## [17] sva_3.42.0                  BiocParallel_1.28.2        
## [19] genefilter_1.76.0           mgcv_1.8-38                
## [21] nlme_3.1-153                PoissonSeq_1.1.2           
## [23] combinat_0.0-8              DESeq2_1.34.0              
## [25] SummarizedExperiment_1.24.0 Biobase_2.54.0             
## [27] MatrixGenerics_1.6.0        matrixStats_0.61.0         
## [29] GenomicRanges_1.46.1        GenomeInfoDb_1.30.0        
## [31] IRanges_2.28.0              S4Vectors_0.32.3           
## [33] BiocGenerics_0.40.0         edgeR_3.36.0               
## [35] limma_3.50.0                FastGGM_1.0                
## [37] RcppParallel_5.1.4          Rcpp_1.0.7                 
## [39] huge_1.3.5                  glmnet_4.1-3               
## [41] Matrix_1.3-4                ggrepel_0.9.1              
## [43] plotly_4.10.0               stargazer_5.2.2            
## [45] corrplot_0.92               ggnewscale_0.4.5           
## [47] gridExtra_2.3               ggplot2_3.3.5              
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                R.utils_2.11.0           
##   [3] tidyselect_1.1.1          RSQLite_2.2.9            
##   [5] AnnotationDbi_1.56.2      htmlwidgets_1.5.4        
##   [7] grid_4.1.2                munsell_0.5.0            
##   [9] codetools_0.2-18          preprocessCore_1.56.0    
##  [11] nleqslv_3.3.2             withr_2.4.3              
##  [13] colorspace_2.0-2          filelock_1.0.2           
##  [15] highr_0.9                 knitr_1.36               
##  [17] rstudioapi_0.13           labeling_0.4.2           
##  [19] GenomeInfoDbData_1.2.7    hwriter_1.3.2            
##  [21] farver_2.1.0              bit64_4.0.5              
##  [23] rhdf5_2.38.0              vctrs_0.3.8              
##  [25] generics_0.1.1            xfun_0.28                
##  [27] BiocFileCache_2.2.0       R6_2.5.1                 
##  [29] illuminaio_0.36.0         locfit_1.5-9.4           
##  [31] reshape_0.8.8             bitops_1.0-7             
##  [33] rhdf5filters_1.6.0        cachem_1.0.6             
##  [35] DelayedArray_0.20.0       assertthat_0.2.1         
##  [37] BiocIO_1.4.0              scales_1.1.1             
##  [39] rootSolve_1.8.2.3         gtable_0.3.0             
##  [41] affy_1.72.0               methylumi_2.40.1         
##  [43] lmom_2.8                  rlang_0.4.12             
##  [45] rtracklayer_1.54.0        lazyeval_0.2.2           
##  [47] GEOquery_2.62.1           BiocManager_1.30.16      
##  [49] yaml_2.2.1                crosstalk_1.2.0          
##  [51] GenomicFeatures_1.46.1    tools_4.1.2              
##  [53] nor1mix_1.3-0             affyio_1.64.0            
##  [55] ellipsis_0.3.2            lumi_2.46.0              
##  [57] jquerylib_0.1.4           siggenes_1.68.0          
##  [59] proxy_0.4-26              plyr_1.8.6               
##  [61] sparseMatrixStats_1.6.0   progress_1.2.2           
##  [63] zlibbioc_1.40.0           purrr_0.3.4              
##  [65] RCurl_1.98-1.5            prettyunits_1.1.1        
##  [67] openssl_1.4.5             bumphunter_1.36.0        
##  [69] zoo_1.8-9                 sfsmisc_1.1-12           
##  [71] magrittr_2.0.1            data.table_1.14.2        
##  [73] mvtnorm_1.1-3             aroma.light_3.24.0       
##  [75] hms_1.1.1                 evaluate_0.14            
##  [77] xtable_1.8-4              XML_3.99-0.8             
##  [79] jpeg_0.1-9                mclust_5.4.8             
##  [81] shape_1.4.6               compiler_4.1.2           
##  [83] biomaRt_2.50.1            minfi_1.40.0             
##  [85] tibble_3.1.6              KernSmooth_2.23-20       
##  [87] crayon_1.4.2              R.oo_1.24.0              
##  [89] htmltools_0.5.2           tzdb_0.2.0               
##  [91] tidyr_1.1.4               geneplotter_1.72.0       
##  [93] expm_0.999-6              Exact_3.1                
##  [95] DBI_1.1.1                 dbplyr_2.1.1             
##  [97] MASS_7.3-54               rappdirs_0.3.3           
##  [99] boot_1.3-28               readr_2.1.1              
## [101] quadprog_1.5-8            R.methodsS3_1.8.1        
## [103] parallel_4.1.2            igraph_1.2.9             
## [105] pkgconfig_2.0.3           xml2_1.3.3               
## [107] foreach_1.5.1             annotate_1.72.0          
## [109] rngtools_1.5.2            multtest_2.50.0          
## [111] beanplot_1.2              doRNG_1.8.2              
## [113] scrime_1.3.5              stringr_1.4.0            
## [115] digest_0.6.29             base64_2.0               
## [117] rmarkdown_2.11            gld_2.6.3                
## [119] DelayedMatrixStats_1.16.0 restfulr_0.0.13          
## [121] curl_4.3.2                rjson_0.2.20             
## [123] lifecycle_1.0.1           jsonlite_1.7.2           
## [125] Rhdf5lib_1.16.0           askpass_1.1              
## [127] viridisLite_0.4.0         fansi_0.5.0              
## [129] pillar_1.6.4              lattice_0.20-45          
## [131] KEGGREST_1.34.0           fastmap_1.1.0            
## [133] httr_1.4.2                survival_3.2-13          
## [135] glue_1.5.1                xts_0.12.1               
## [137] zip_2.2.0                 png_0.1-7                
## [139] iterators_1.0.13          bit_4.0.4                
## [141] class_7.3-19              stringi_1.7.6            
## [143] HDF5Array_1.22.1          blob_1.2.2               
## [145] latticeExtra_0.6-29       memoise_2.0.1            
## [147] dplyr_1.0.7               e1071_1.7-9